Load packages
library(tidyverse)
library(gridExtra)
library(cowplot)
library(ggridges)
library(ggstance)
library(treeio)
library(ggtree)
library(tidytree)
Load and combine data
species <- read_csv("../data/species_data.csv") %>% select(-species)
data <- read_csv("../data/data.csv") %>%
left_join(species, by = c("species" = "species_formatted")) %>%
rename(label = species_latin)
data2 <- data %>% group_by(species, label) %>% summarise(n = sum(n))
fulltree <- read.nexus("../data/consensusTree_10kTrees_298Primates_V3.nex")
refs <- read_csv("../data/ref_nodes.csv")
# turn tree into tidy dataframe
tree2 <- as_tibble(fulltree)
tree3 <- tree2 %>%
left_join(data2) %>%
mutate(
hasN = ifelse(is.na(n), 0, .5),
hasN2 = ifelse(is.na(n), .1, .5)) %>%
left_join(refs) %>%
# # also merge w datasheet listing node, n for inner nodes
# left_join(Ns, by = "node") %>%
# mutate(n = coalesce(n.x, n.y)) %>%
# select(-n.x, -n.y) %>%
groupClade(c(493, 496, 429, 302, 408)) %>%
mutate(group = fct_recode(group, "2" = "1"))
Joining, by = "label"
Joining, by = "node"
# turn back into tree
tree4 <- as.treedata(tree3)
This makes a rectangular and a circular tree with the node numbers displayed for reference (saved in the graphs folder).
tree3.2 <- as.treedata(tree3)
# display node numbers for reference
ggtree(tree3.2) +
# tip labels
geom_tippoint(aes(size = n), col = "seagreen", alpha = .5) +
geom_tiplab(offset = 1, size = 3) +
# node labels
geom_text(aes(label = node, x = branch), size = 2, col = "blue", vjust = -.5) +
expand_limits(x = 90) +
# display timescale at the bottom
theme_tree2()
ggsave("../graphs/full_tree_nodes.pdf", width = 8, height = 20, scale = 2)
ggtree(tree3.2, layout = "circular") +
geom_tippoint(aes(size = n), col = "seagreen", alpha = .5) +
geom_tiplab2(offset = 2, size = 3) +
geom_text2(aes(label = node), size = 1.5, col = "blue") +
xlim(NA, 100)
ggsave("../graphs/full_tree_nodes_circular.pdf", width = 8, height = 8, scale = 2)
cols <- viridis::viridis(4, end = .9)
# base plot
p <- ggtree(tree4, aes(size = hasN, alpha = hasN2), layout = "circular") +
# root
geom_rootpoint(size = 1) +
# tips
geom_tippoint(aes(size = n), alpha = .5) +
geom_tiplab2(aes(alpha = hasN), offset = 2, size = 3) +
# tweak scales
scale_alpha_continuous(range = c(.3, 1)) +
scale_size_continuous(range = c(.5, 8)) +
# widen plotting area
xlim(NA, 100)
# highlight clades with background colors
p2 <- p +
geom_hilight(node = 493, fill = cols[1], alpha = .3) +
geom_hilight(node = 496, fill = cols[1], alpha = .3) +
geom_hilight(node = 429, fill = cols[2], alpha = .3) +
geom_hilight(node = 303, fill = cols[3], alpha = .3) +
geom_hilight(node = 408, fill = cols[4], alpha = .3) +
# plot tree again to be on top of the highlights
geom_tree() +
geom_rootpoint(size = 1)
# highlight clades with branch colors
p3 <- p +
aes(col = group) +
scale_color_manual(values = c("gray30", cols))
plots <- mget(c("p", "p2", "p3"))
grid.arrange(p, p2, p3, nrow = 1)
# png with 3x1
ggsave("../graphs/phylo_full.png", arrangeGrob(grobs = plots, nrow = 1),
width = 24, height = 8, scale = 2, dpi = 72)
Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).
# pdf with 1 per page
ggsave("../graphs/phylo_full.pdf", marrangeGrob(grobs = plots, nrow = 1, ncol = 1),
width = 8, height = 8, scale = 2, dpi = 72)
Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).
# subset tree to just those species who have sample sizes reported, i.e. those who were tested
to_drop <- tree3 %>% filter(is.na(n)) %>% pull(label)
tree5 <- drop.tip(tree4, to_drop)
data3 <- data %>%
select(label, everything()) %>%
rename(num = n)
d3a <- data3 %>% group_by(species) %>% filter(n_distinct(site) > 2)
d3b <- setdiff(data3, d3a)
q <- ggtree(tree5, aes(col = group)) +
# this is a dummy point to expand the x scale
# the typical ways weirdly also expand it for the sample size panel once that's added
geom_point(data = tibble(x = 135, y = 1, .panel = "Tree", group = NA)) +
# tip labels
geom_tippoint(aes(size = n), alpha = .5) +
geom_tiplab(offset = 4, size = 3) +
# tweak scales
scale_color_manual(values = c("grey30", cols)) +
scale_fill_manual(values = cols[4]) + # when all categories are taken: cols
# display timescale at the bottom
theme_tree2()
ggplot(data3, aes(x = num, y = label, group = label)) +
geom_density_ridges(lwd = .3, col = "grey80", bandwidth = 1)
ggplot(data3, aes(x = num, y = label, group = label)) +
geom_point()
# add densities (or points when densities can't be calculated / are meaningless)
# dirty hack: there's a small character in front of "Sample sizes" to have that panel sort to the right (alphabetically) until I figure out why it doesn't just go by order. This cropped up as an issue when I added the dummy point for the x-axis expansion...
q %>%
facet_plot("Ù´ Sample sizes", d3b, geom_point, aes(x = num, group = label, alpha = .5)) %>%
facet_plot("Ù´ Sample sizes", d3a, geom_density_ridges, aes(x = num, group = label, fill = group),
alpha = .5, lwd = .3) +
panel_border()
ggsave("../graphs/phylo_ridge.png", width = 4, height = 3, scale = 2)
Relevant functions to tinker with, if necessary:
facet_plot
function (p, panel, data, geom, mapping = NULL, ...)
{
p <- add_panel(p, panel)
df <- p %+>% data
p + geom(data = df, mapping = mapping, ...)
}
<bytecode: 0x7fbcd8d8c2c0>
<environment: namespace:ggtree>
ggtree:::add_panel
function (p, panel)
{
df <- p$data
if (is.null(df[[".panel"]])) {
df[[".panel"]] <- factor("Tree")
}
levels(df$.panel) %<>% c(., panel)
p$data <- df
p + facet_grid(. ~ .panel, scales = "free_x")
}
<bytecode: 0x7fbcd8d8b1e8>
<environment: namespace:ggtree>
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tidytree_0.2.8 ggtree_1.16.6 treeio_1.8.2 ggstance_0.3.3 ggridges_0.5.1
[6] cowplot_1.0.0 gridExtra_2.3 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
[11] purrr_0.3.2 readr_1.3.1 tidyr_1.0.0 tibble_2.1.3 ggplot2_3.2.1
[16] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] tidyselect_0.2.5 xfun_0.10 reshape2_1.4.3 haven_2.1.1 lattice_0.20-38
[6] colorspace_1.4-1 vctrs_0.2.0 generics_0.0.2 viridisLite_0.3.0 rlang_0.4.0
[11] pillar_1.4.2 glue_1.3.1 withr_2.1.2 modelr_0.1.5 readxl_1.3.1
[16] rvcheck_0.1.5 lifecycle_0.1.0 plyr_1.8.4 munsell_0.5.0 gtable_0.3.0
[21] cellranger_1.1.0 rvest_0.3.4 labeling_0.3 knitr_1.25 parallel_3.6.1
[26] broom_0.5.2 Rcpp_1.0.2 BiocManager_1.30.7 scales_1.0.0 backports_1.1.5
[31] jsonlite_1.6 hms_0.5.1 stringi_1.4.3 grid_3.6.1 cli_1.1.0
[36] tools_3.6.1 magrittr_1.5 lazyeval_0.2.2 crayon_1.3.4 ape_5.3
[41] pkgconfig_2.0.3 zeallot_0.1.0 xml2_1.2.2 lubridate_1.7.4 viridis_0.5.1
[46] assertthat_0.2.1 httr_1.4.1 rstudioapi_0.10 R6_2.4.0 nlme_3.1-141
[51] compiler_3.6.1